




//I start with a cross sectional "demographic" file provided by PG&E (confidential)
//It's how I determine who opted on to peak pricing or not and when
insheet using "raw data\combine4_2015_0713_extract.csv", comma clear

rename sa_id sa

gen complier = 0
replace complier = 1 if status_name == "Active" & action_type == "Default" & program_status == "Enrolled"

gen date_str = substr(pdp_start_date,1,10)
gen pdp_start = date(date_str,"YMD")

gen end_date_str = substr(pdp_end_date,1,10)
gen pdp_end = date(end_date_str,"YMD")

drop end_date_str date_str

format pdp_start pdp_end %td

gen pdp_start_year = year(pdp_start)

gen optionally_enrolled = 0
replace optionally_enrolled = 1 if enroll_optional !=""
drop enroll_optional 


keep sa complier status_name action_type action_status program_status optionally_enrolled pdp_start pdp_end
compress *

save "stata data/PDP extract 2015_0713 full data", replace

use "stata data/PDP extract 2015_0713 full data", clear
gen pdp = 0
replace pdp = 1 if status_name == "Active"

keep sa pdp optionally_enrolled

save "stata data/PDP extract 2015_0713 data - useful", replace




//This is a list of critical peak pricing event days for 2008 to 2014 provided by PG&E. Not confidential. 
import excel using "public raw data\PDP 2008-2014.xlsx", firstrow clear case(lower)
rename eventdate date
format date %td
keep date
save "stata data\pdp event dates 2008-2014", replace

/// 2015 PDP event days found here http://www.pge.com/includes/docs/pdfs/mybusiness/energysavingsrebates/demandresponse/cs/November2015_ILPreport.pdf
import excel using "raw data\151224 PDP events for 2015 from internet.xlsx", firstrow clear
//for some reason the date it imports is wrong... so I did it manually
gen date = mdy(month,day,year)
format date %td
keep date
append using "stata data\pdp event dates 2008-2014"
save "stata data/2015 pdp event dates", replace


////////
//TOU Demographic datasets 
////////


use "raw data/TOU_demographic raw", clear //to delete - this came from step2-160209 make TOU demographic file pdated smart meter(replication).do"


gen double lat_all = lat_sp
replace lat_all = lat_prm if lat_sp == 0

gen double long_all = long_sp
replace long_all = long_prm if long_sp == 0

//make week bins
gen days_before = -1*(sp_min_date - 18870)
capture drop bin
gen bin = round((days_before+4)/7)
replace bin = bin -1 if bin <=0
drop days_before

save "stata data\TOU_demographic 2", replace


///
///Make a version with latitude and longitude 
///

use "stata data\TOU_demographic 2", clear
keep sa lat_all long_all
duplicates drop
save "stata data\TOU_demographic 2 lat long", replace


//make just a cliamte zone file
use "stata data\TOU_demographic 2", clear
keep sa climate
rename climate climate_str
encode climate_str, gen(climate)
drop climate_str
save  "stata data\TOU_demographic 2 climate", replace



use "stata data/TOU_demographic 2", clear
keep sa sp_min_date
duplicates drop
drop if sp_min_date == .
rename sp_min_date sm_date
save "stata data/TOU_demographic sm_date", replace




//do the latitude-longitude file with the demopgrahics stuff

use "stata data\TOU_demographic 2", clear


keep naics* sa 
replace naics1 = "0" if naics1 == "44-450"

destring naics*, replace

foreach naics of varlist naics1 naics2 {
replace `naics' = 0 if `naics' == 999900
replace `naics' = 0 if `naics' < 5000
replace `naics' = . if `naics' == 0
}
*

gen naics_most = naics1
replace naics_most = naics2 if naics1 == .

tostring naics_most, gen(naics_most_str)

gen naics_most_2 = substr(naics_most_str,1,2)
destring naics_most_2, replace


//do the somewhat dissaggregated stuff


gen goods_ag_construction = 0
replace goods_ag_construction = 1 if inlist(naics_most_2,11,21,23)

gen goods_utilities = 0
replace goods_utilities = 1 if naics_most_2 == 22

gen goods_manufacturing = 0
replace goods_manufacturing = 1 if inlist(naics_most_2,31,32,33)

gen services_whole_retail_t = 0
replace services_whole_retail_t = 1 if inlist(naics_most_2,42,43,44,45,48,49)

gen services_buisness = 0
replace services_buisness= 1 if inlist(naics_most_2,51,52,53,54,55,56)

gen services_education = 0
replace services_education = 1 if inlist(naics_most_2,61)

gen services_healthcare = 0
replace services_healthcare = 1 if inlist(naics_most_2,62)


gen services_leisure_other = 0
replace services_leisure_other = 1 if inlist(naics_most_2,71,72,81,92)

gen services_food_accom = 0
replace services_food_accom = 1 if inlist(naics_most_2,72)

drop   naics1 naics_most_str

compress *
drop if naics_most_2 == .


gen classification = ""
foreach var of varlist  goods_ag_construction	goods_utilities	goods_manufacturing	services_whole_retail_t	services_buisness	services_education	services_healthcare	services_leisure_other	services_food_accom{
replace classification = "`var'" if `var' == 1
}

save "stata data\TOU_demographic naics", replace


use "stata data\TOU_demographic naics", clear

keep sa naics_most naics_most_2 naics2

save "stata data\TOU_demographic naics only", replace



///
///This one gets used right before the regressons 

insheet using "raw data\bobjin3_2016_0930_extract.txt", comma clear

save "stata data/extract file september 2016", replace

use "stata data/extract file september 2016", clear

gen date_str = substr(pdp_start_date,1,10)
gen pdp_start = date(date_str,"YMD")

gen end_date_str = substr(pdp_end_date,1,10)
gen pdp_end = date(end_date_str,"YMD")

drop end_date_str date_str

format pdp_start pdp_end %td

gen pdp_start_year = year(pdp_start)

gen optionally_enrolled = 0
replace optionally_enrolled = 1 if enroll_optional !=""
drop enroll_optional 

rename sa_id sa

merge 1:1 sa using "stata data/TOU_demographic 2"
keep if _merge == 3

keep sa  status_name action_type action_status program_status optionally_enrolled pdp_start* pdp_end rs stopdate

gen pdp = 0
replace pdp = 1 if program_st == "Enrolled"

//I am defining defaulted as placed on to opt out rate. Based on document with andrew's info
//defaulted is anyone that has an action type. People who opt out are inactive status 

gen opt_out = 0
replace opt_out = 1 if action_type == "Optout"
replace opt_out = 1 if action_type == "Unenroll"

keep if rs == "HA1X"
keep if pdp_start_year == 2014
drop if optionally_enrolled == 1

save "temp/opt out new extract updated", replace
